Function to calculate the distribution function of the longest run and its mean. p - probability of success, n - number of trials.
library(plotly)
library(ggplot2)
longest_run_distribution <- function(p, n){
mean = 0
E <- function(x){-(p^x) * exp(-(p^x)) * log(p, exp(1))}
find_area <- function(l){
lower_bound = l - log(n*(1-p), 1/p)
upper_bound = lower_bound + 1
area = integrate(E, lower_bound, upper_bound)
return(area[["value"]])
}
probabilities <- vector()
labels <- vector()
for(l in 1:n){
area = find_area(l)
# calculating mean
mean = mean + area * l
if(area > 0.001){
probabilities <- c(probabilities, area)
labels <- c(labels, toString(l))
}
}
print("Mean:")
print(mean)
# plotting
df <- data.frame(text=labels,
num=probabilities)
plot_ly(data = df,
x = ~text,
y = ~num,
type = "bar",
color = I("darkred")
) %>%
layout(
title = "Distribution of the longest run lenghts",
xaxis = list(title = "Length of the longest run",
categoryorder = "array",
categoryarray = ~text),
yaxis = list(title = "Probability", range = c(0,max(probabilities)))
)
}
Function to calculate the empirical distribution function of the longest run of Heads in n tosses of a fair coin and its mean.
longest_run_empirical_distribution <- function(n){
trials = 10000
longest_lens = vector()
for (i in 1:trials) {
coin <- sample(c("H", "T"), n, replace = TRUE)
coin.rle <- rle(coin)
longest_run_len = tapply(coin.rle$lengths, coin.rle$values, max)[1][["H"]]
longest_lens <- c(longest_lens, longest_run_len)
}
#calculating mean
mean = sum(longest_lens)/trials
print("Mean:")
print(mean)
#plotting
plot_ly(x=longest_lens, histfunc='sum', type = "histogram", color = I("orange")) %>%
layout(title = "Empirical distribution of the longest run lenghts",
xaxis = list(title = "Length of the longest run"),
yaxis = list(title = "Frequency"))
}
Distribution of lenght of the longest run of Heads for n = 100 tosses of a fair coin:
p = 1/2
n = 100
longest_run_distribution(p,n)
[1] "Mean:"
[1] 5.976601
Empirical distribution of lenght of the longest run of Heads for n = 100 tosses of a fair coin:
longest_run_empirical_distribution(n)
[1] "Mean:"
[1] 5.9898
Distribution of lenght of the longest run of Heads for n = 1000 tosses of a fair coin:
p = 1/2
n = 1000
longest_run_distribution(p,n)
[1] "Mean:"
[1] 9.298531
Empirical distribution of lenght of the longest run of Heads for n = 1000 tosses of a fair coin:
longest_run_empirical_distribution(n)
[1] "Mean:"
[1] 9.288
Distribution of lenght of the longest run of Heads for n = 10000 tosses of a fair coin:
p = 1/2
n = 10000
longest_run_distribution(p,n)
[1] "Mean:"
[1] 12.62046
Empirical distribution of lenght of the longest run of Heads for n = 10000 tosses of a fair coin:
longest_run_empirical_distribution(n)
[1] "Mean:"
[1] 12.6057
Thus, we can see that cdf corresponds to the empirical cdf. An expected value obtained calculating numerically is close to the true mean. This is justified by the Law of large numbers. LLN says that the average of the results obtained from a large number of trials should be close to the expected value, and will tend to become closer as more trials are performed.
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpGdW5jdGlvbiB0byBjYWxjdWxhdGUgdGhlIGRpc3RyaWJ1dGlvbiBmdW5jdGlvbiBvZiB0aGUgbG9uZ2VzdCBydW4gYW5kIGl0cyBtZWFuLg0KcCAtIHByb2JhYmlsaXR5IG9mIHN1Y2Nlc3MsIG4gLSBudW1iZXIgb2YgdHJpYWxzLg0KDQpgYGB7cn0NCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbG9uZ2VzdF9ydW5fZGlzdHJpYnV0aW9uIDwtIGZ1bmN0aW9uKHAsIG4pew0KICBtZWFuID0gMCANCiAgRSA8LSBmdW5jdGlvbih4KXstKHBeeCkgKiBleHAoLShwXngpKSAqIGxvZyhwLCBleHAoMSkpfQ0KICBmaW5kX2FyZWEgPC0gZnVuY3Rpb24obCl7DQogICAgbG93ZXJfYm91bmQgPSBsIC0gbG9nKG4qKDEtcCksIDEvcCkNCiAgICB1cHBlcl9ib3VuZCA9IGxvd2VyX2JvdW5kICsgMQ0KICAgIGFyZWEgPSBpbnRlZ3JhdGUoRSwgbG93ZXJfYm91bmQsIHVwcGVyX2JvdW5kKSANCiAgICByZXR1cm4oYXJlYVtbInZhbHVlIl1dKQ0KICB9DQogIHByb2JhYmlsaXRpZXMgPC0gdmVjdG9yKCkNCiAgbGFiZWxzIDwtIHZlY3RvcigpDQogIGZvcihsIGluIDE6bil7DQogICAgYXJlYSA9IGZpbmRfYXJlYShsKQ0KICAgIA0KICAgICMgY2FsY3VsYXRpbmcgbWVhbg0KICAgIG1lYW4gPSBtZWFuICsgYXJlYSAqIGwNCiAgICANCiAgICBpZihhcmVhID4gMC4wMDEpew0KICAgICAgcHJvYmFiaWxpdGllcyA8LSBjKHByb2JhYmlsaXRpZXMsIGFyZWEpDQogICAgICBsYWJlbHMgPC0gYyhsYWJlbHMsIHRvU3RyaW5nKGwpKQ0KICAgIH0NCiAgfQ0KICBwcmludCgiTWVhbjoiKQ0KICBwcmludChtZWFuKQ0KICANCiAgIyBwbG90dGluZw0KICBkZiA8LSBkYXRhLmZyYW1lKHRleHQ9bGFiZWxzLA0KICAgICAgICAgICAgICAgICAgbnVtPXByb2JhYmlsaXRpZXMpDQogIA0KICAgICAgcGxvdF9seShkYXRhID0gZGYsDQogICAgICAgeCA9IH50ZXh0LA0KICAgICAgIHkgPSB+bnVtLA0KICAgICAgIHR5cGUgPSAiYmFyIiwNCiAgICAgICBjb2xvciA9IEkoImRhcmtyZWQiKQ0KICApICU+JQ0KICBsYXlvdXQoDQogICAgICAgdGl0bGUgPSAiRGlzdHJpYnV0aW9uIG9mIHRoZSBsb25nZXN0IHJ1biBsZW5naHRzIiwNCiAgICAgICB4YXhpcyA9IGxpc3QodGl0bGUgPSAiTGVuZ3RoIG9mIHRoZSBsb25nZXN0IHJ1biIsDQogICAgICAgY2F0ZWdvcnlvcmRlciA9ICJhcnJheSIsDQogICAgICAgY2F0ZWdvcnlhcnJheSA9IH50ZXh0KSwNCiAgICAgICB5YXhpcyA9IGxpc3QodGl0bGUgPSAiUHJvYmFiaWxpdHkiLCByYW5nZSA9IGMoMCxtYXgocHJvYmFiaWxpdGllcykpKQ0KICApDQp9DQoNCmBgYA0KDQpGdW5jdGlvbiB0byBjYWxjdWxhdGUgdGhlIGVtcGlyaWNhbCBkaXN0cmlidXRpb24gZnVuY3Rpb24gb2YgdGhlIGxvbmdlc3QgcnVuIG9mIEhlYWRzIGluIG4gdG9zc2VzIG9mIGEgZmFpciBjb2luIGFuZCBpdHMgbWVhbi4NCg0KYGBge3J9DQpsb25nZXN0X3J1bl9lbXBpcmljYWxfZGlzdHJpYnV0aW9uIDwtIGZ1bmN0aW9uKG4pew0KICB0cmlhbHMgPSAxMDAwMA0KICBsb25nZXN0X2xlbnMgPSB2ZWN0b3IoKQ0KICBmb3IgKGkgaW4gMTp0cmlhbHMpIHsNCiAgICBjb2luIDwtIHNhbXBsZShjKCJIIiwgIlQiKSwgbiwgcmVwbGFjZSA9IFRSVUUpDQogICAgY29pbi5ybGUgPC0gcmxlKGNvaW4pDQogICAgbG9uZ2VzdF9ydW5fbGVuID0gdGFwcGx5KGNvaW4ucmxlJGxlbmd0aHMsIGNvaW4ucmxlJHZhbHVlcywgbWF4KVsxXVtbIkgiXV0NCiAgICBsb25nZXN0X2xlbnMgPC0gYyhsb25nZXN0X2xlbnMsIGxvbmdlc3RfcnVuX2xlbikNCiAgfQ0KICANCiAgI2NhbGN1bGF0aW5nIG1lYW4NCiAgbWVhbiA9IHN1bShsb25nZXN0X2xlbnMpL3RyaWFscw0KICBwcmludCgiTWVhbjoiKQ0KICBwcmludChtZWFuKQ0KICANCiAgI3Bsb3R0aW5nDQogIHBsb3RfbHkoeD1sb25nZXN0X2xlbnMsIGhpc3RmdW5jPSdzdW0nLCB0eXBlID0gImhpc3RvZ3JhbSIsIGNvbG9yID0gSSgib3JhbmdlIikpICU+JQ0KICBsYXlvdXQodGl0bGUgPSAiRW1waXJpY2FsIGRpc3RyaWJ1dGlvbiBvZiB0aGUgbG9uZ2VzdCBydW4gbGVuZ2h0cyIsDQogIHhheGlzID0gbGlzdCh0aXRsZSA9ICJMZW5ndGggb2YgdGhlIGxvbmdlc3QgcnVuIiksDQogIHlheGlzID0gbGlzdCh0aXRsZSA9ICJGcmVxdWVuY3kiKSkNCiAgDQp9DQpgYGANCg0KRGlzdHJpYnV0aW9uIG9mIGxlbmdodCBvZiB0aGUgbG9uZ2VzdCBydW4gb2YgSGVhZHMgZm9yIG4gPSAxMDAgdG9zc2VzIG9mIGEgZmFpciBjb2luOg0KDQpgYGB7cn0NCnAgPSAxLzINCm4gPSAxMDANCmxvbmdlc3RfcnVuX2Rpc3RyaWJ1dGlvbihwLG4pDQoNCmBgYA0KDQpFbXBpcmljYWwgZGlzdHJpYnV0aW9uIG9mIGxlbmdodCBvZiB0aGUgbG9uZ2VzdCBydW4gb2YgSGVhZHMgZm9yIG4gPSAxMDAgdG9zc2VzIG9mIGEgZmFpciBjb2luOg0KDQpgYGB7cn0NCmxvbmdlc3RfcnVuX2VtcGlyaWNhbF9kaXN0cmlidXRpb24obikNCmBgYA0KDQpEaXN0cmlidXRpb24gb2YgbGVuZ2h0IG9mIHRoZSBsb25nZXN0IHJ1biBvZiBIZWFkcyBmb3IgbiA9IDEwMDAgdG9zc2VzIG9mIGEgZmFpciBjb2luOg0KDQpgYGB7cn0NCnAgPSAxLzINCm4gPSAxMDAwDQpsb25nZXN0X3J1bl9kaXN0cmlidXRpb24ocCxuKQ0KDQpgYGANCg0KRW1waXJpY2FsIGRpc3RyaWJ1dGlvbiBvZiBsZW5naHQgb2YgdGhlIGxvbmdlc3QgcnVuIG9mIEhlYWRzIGZvciBuID0gMTAwMCB0b3NzZXMgb2YgYSBmYWlyIGNvaW46DQoNCmBgYHtyfQ0KbG9uZ2VzdF9ydW5fZW1waXJpY2FsX2Rpc3RyaWJ1dGlvbihuKQ0KYGBgDQoNCkRpc3RyaWJ1dGlvbiBvZiBsZW5naHQgb2YgdGhlIGxvbmdlc3QgcnVuIG9mIEhlYWRzIGZvciBuID0gMTAwMDAgdG9zc2VzIG9mIGEgZmFpciBjb2luOg0KDQpgYGB7cn0NCnAgPSAxLzINCm4gPSAxMDAwMA0KbG9uZ2VzdF9ydW5fZGlzdHJpYnV0aW9uKHAsbikNCmBgYA0KDQpFbXBpcmljYWwgZGlzdHJpYnV0aW9uIG9mIGxlbmdodCBvZiB0aGUgbG9uZ2VzdCBydW4gb2YgSGVhZHMgZm9yIG4gPSAxMDAwMCB0b3NzZXMgb2YgYSBmYWlyIGNvaW46DQoNCmBgYHtyfQ0KbG9uZ2VzdF9ydW5fZW1waXJpY2FsX2Rpc3RyaWJ1dGlvbihuKQ0KYGBgDQpUaHVzLCB3ZSBjYW4gc2VlIHRoYXQgY2RmIGNvcnJlc3BvbmRzIHRvIHRoZSBlbXBpcmljYWwgY2RmLiBBbiBleHBlY3RlZCB2YWx1ZSBvYnRhaW5lZCBjYWxjdWxhdGluZyBudW1lcmljYWxseSBpcyBjbG9zZSB0byB0aGUgdHJ1ZSBtZWFuLiBUaGlzIGlzIGp1c3RpZmllZCBieSB0aGUgTGF3IG9mIGxhcmdlIG51bWJlcnMuIExMTiBzYXlzIHRoYXQgdGhlIGF2ZXJhZ2Ugb2YgdGhlIHJlc3VsdHMgb2J0YWluZWQgZnJvbSBhIGxhcmdlIG51bWJlciBvZiB0cmlhbHMgc2hvdWxkIGJlIGNsb3NlIHRvIHRoZSBleHBlY3RlZCB2YWx1ZSwgYW5kIHdpbGwgdGVuZCB0byBiZWNvbWUgY2xvc2VyIGFzIG1vcmUgdHJpYWxzIGFyZSBwZXJmb3JtZWQuDQoNCg0K